

* +++++++++++++++++++++
* CLEAN PRO ASYL GROUPS DATA
* +++++++++++++++++++++

* prep crosswalk: Gemeinde to AGS codes
use "${data_raw}/geo/VG250_GEM_data.dta", clear

gen ags = substr(AGS, 1, 5)
keep ags SN_L GEN AGS EWZ NUTS
ren (AGS GEN EWZ NUTS) (city_id city_name pop nuts3)
renvars *, lower
destring sn_l, replace 
bys city_id city_name : keep if _n==1 
isid city_id

replace city_name = lower(city_name)

tempfile cw
save `cw'

* clean up list of Pro-Asyl groups
* source: email from Pro-Asyl (Max Klöckner, March 31, 2021)
import excel using "${data_raw}/proasyl_groups/MitmachenKarte.xlsx", clear firstrow

renvars *, lower
gen initiative_id = _n 

* highlight FB links
gen has_fb = strpos(linkkontakt, "www.facebook.com")!=0
gen fb_link = linkkontakt if has_fb == 1 
ren linkkontakt url

drop eingetragen beschreibung has_fb
drop if name ==""

* create state variable
gen sn_l = 1 if bundesland == "SH"
replace sn_l = 2 if bundesland == "H"
replace sn_l = 3 if bundesland == "NS"
replace sn_l = 4 if bundesland == "BRE"
replace sn_l = 5 if bundesland == "NRW"
replace sn_l = 6 if bundesland == "HE"
replace sn_l = 7 if bundesland == "RLP"
replace sn_l = 8 if bundesland == "BW" | bundesland == "BW "
replace sn_l = 9 if bundesland == "B"
replace sn_l = 10 if bundesland == "SAAR"
replace sn_l = 11 if bundesland == "BER"
replace sn_l = 12 if bundesland == "BRA"
replace sn_l = 13 if bundesland == "MV"
replace sn_l = 14 if bundesland == "S"
replace sn_l = 15 if bundesland == "SA" | bundesland == "S-A"
replace sn_l = 16 if bundesland == "THÜ"

drop bundesland 
ren ort city_name
replace city_name = lower(city_name)

* crosswalk to cities --> state is required, fuzzy on city name
reclink city_name sn_l using `cw', required(sn_l) ///
	idu(city_id) idm(initiative_id) gen(match_p)

* keep only 'good' matches 
keep if match_p >= 0.9 & _merge ==3	
bys initiative_id : egen max_match_p = max(match_p)
drop if match_p < max_match_p

gen match_p_neg = (-1)*match_p

* prioritize city with largest pop if matched to multiple cities
merge m:1 city_id using `cw', assert(2 3) keep(3) keepusing (pop) nogen

gen pop_neg = (-1)*pop
bys initiative_id (match_p_neg pop_neg) : keep if _n==1

* clean up
keep name Ucity_name initiative_id fb_link city_id ags url
ren Ucity_name city_name

* merge on nuts
merge m:1 city_id using `cw', assert(2 3) keep(3) keepusing (nuts3) nogen
destring city_id ags, replace

* indicator for whether groups already included through Drew's string search
replace name = lower(name)
gen already = 0 
replace already = 1 if strpos(name, "pro asyl")!= 0
replace already = 1 if strpos(name, "seebrücke")!= 0
replace already = 1 if strpos(name, "kein mensch ist illegal")!= 0
replace already = 1 if strpos(name, "flüchtlingshilfe")!= 0
replace already = 1 if strpos(name, "flüchtlinge willkommen")!= 0
replace already = 1 if strpos(name, "refugees welcome")!= 0
replace already = 1 if strpos(name, "ist bunt")!= 0
replace already = 1 if strpos(name, "bleibt bunt")!= 0
replace already = 1 if strpos(name, "buntes miteinander")!= 0
replace already = 1 if strpos(name, "flüchtlingsrat")!= 0
replace already = 1 if strpos(name, "flüchtlingsnetzwerk")!= 0
replace already = 1 if strpos(name, "gegen rechts")!= 0
replace already = 1 if strpos(name, "gegen rassismus")!= 0
ren already already_in_string_search

* save 
save "${data_derived}/list_of_proasyl_groups_clean.dta", replace


